require(ggplot2)
require(scales)

# Set locale
Sys.setlocale("LC_TIME", "it_IT")

# Important dates
date_min <- as.Date("1992-01-01")
date_vday_07 <- as.Date('2007-09-08')
date_admin_election_10 <- as.Date('2010-05-30')
date_admin_elections_12 <- as.Date('2012-05-06')
date_admin_elections_12 <- as.Date('2012-05-06')
date_sicily_election_12 <- as.Date('2012-10-28')
date_gener_election_13 <- as.Date('2013-02-24')
date_europ_election_14 <- as.Date('2014-05-22')
date_max <- as.Date('2016-01-31')

# Actual perc
require(xlsx)
tmp2010 <- read.xlsx2('m5s-elections.xlsx', # Not included
                      sheetIndex = 1,
                      endRow=6)
perc_admin_elections_10 <- 
  sum(as.numeric(as.character(tmp2010$voti_m5s))) / 
  (sum(as.numeric(as.character(tmp2010$votanti))) - sum(as.numeric(as.character(tmp2010$invalide)))) * 100

tmp2012 <- read.xlsx2('m5s-elections.xlsx', # Not incldued
                      sheetIndex = 2,
                      endRow=28)

perc_admin_elections_12 <- 
  sum(as.numeric(as.character(tmp2012$Voti)), na.rm=T) /
  sum(as.numeric(as.character(tmp2012$Votanti)),na.rm=T) * 100
rm(tmp)
perc_gener_election_13 <- 25.56
perc_europ_election_14 <- 21.16
date_df <- data.frame(date = c(date_min, 
                               date_vday_07,
                               date_admin_election_10,
                               date_admin_elections_12,
                               date_gener_election_13,
                               date_europ_election_14,
                               date_max),
                      election_perc = (c(NA, 
                                         NA,
                                         perc_admin_elections_10,
                                         perc_admin_elections_12,
                                         perc_gener_election_13,
                                         perc_europ_election_14, 
                                         NA)) / 100,
                      stringsAsFactors = FALSE)

x_limits <- c(as.Date("2004-01-01"), as.Date("2016-01-31"))

# Source
## Polls
polls <- read.csv("Sondaggi_data.csv", # Not included
                  dec="\t", encoding = 'utf8', stringsAsFactors = FALSE)
names(polls) <- c("date", "perc", "institute", "party")
polls$date <- 
  as.Date(polls$date, format = "%d/%m/%Y %H:%M:%S")
polls$perc <- as.numeric(polls$perc) / 100

g_polls_elect <- 
  ggplot(subset(polls, party == "M5S"), 
         aes(x = date, y = perc), alpha = .8) +
  geom_point(aes(shape = "poll"), size = 0.5) +
  geom_point(data=date_df, aes(x=date, y=election_perc, shape="vote"), size = 3) +
  stat_smooth(geom = "smooth", se = FALSE) +
  geom_vline(xintercept = as.numeric(date_admin_elections_12), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_gener_election_13), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_europ_election_14), 
             linetype = "dotted") +
  scale_x_date(limits = x_limits) +
  labs(x=NULL, y="National vote") +
  scale_y_continuous(labels = scales::percent) +
  scale_shape_manual(values = c(16,13)) +
  theme(legend.position = c(0.2, 0.5)) +
  guides(shape=guide_legend(title=NULL))

# Media
source('https://raw.githubusercontent.com/fraba/R_cheatsheet/master/database.R')
db15 <- 'm5s_newsmedia_hits_mar2016.sqlite'
db16 <- 'm5s_newsmedia_hits_jun2016.sqlite'
sqliteListTables(db16)

## GRILLO
media_bg_ts <- rbind(sqliteGetTable(db15, 'corriere_beppe'),
                     sqliteGetTable(db15, 'repubblica_beppe'),
                     sqliteGetTable(db16, 'corriere_beppe'),
                     sqliteGetTable(db16, 'repubblica_beppe'))
media_politica_ts <- rbind(sqliteGetTable(db15, 'corriere_politica'),
                           sqliteGetTable(db15, 'repubblica_politica'),
                           sqliteGetTable(db16, 'corriere_politica'),
                           sqliteGetTable(db16, 'repubblica_politica'))
names(media_politica_ts)[6] <- 'politica_hits' 
media_bg_ts <- merge(media_bg_ts, 
                     media_politica_ts[,c('from_date','source','politica_hits')],
                     by = c('from_date','source'))
media_bg_ts$ratio <- with(media_bg_ts, hits / politica_hits)
media_bg_ts$ratio[is.na(media_bg_ts$ratio)] <- 0
media_bg_ts$from_date <- as.Date(media_bg_ts$from_date)

require(zoo)
require(dplyr)
media_bg_ts <-
  media_bg_ts %>%
  dplyr::group_by(source) %>%
  dplyr::arrange(from_date) %>%
  dplyr::mutate(roll4weeks = rollmean(ratio, 4, fill = NA),
                roll8weeks = rollmean(ratio, 8, fill = NA),
                roll12weeks = rollmean(ratio, 12, fill = NA),
                roll52weeks = rollmean(ratio, 52, fill = NA))

date_df$mean_bg_all <- NA
date_df$mean_bg_corriere <- NA
date_df$mean_bg_repubblica <- NA

for (i in 1:(nrow(date_df)-1)) {
  for (s in c("corriere", "repubblica", "all")) {
    if (s == "all") {
      tmp_df <- subset(media_bg_ts, 
                       from_date >= date_df$date[i] &
                         to_date < date_df$date[i+1])
    } else {
      tmp_df <- subset(media_bg_ts, 
                       source == s &
                         from_date >= date_df$date[i] &
                         to_date < date_df$date[i+1])
    }
    date_df[[paste0("mean_bg_", s)]][i] <- mean(tmp_df$ratio)
  }
}

g_media_bg <- 
  ggplot() + 
  # geom_smooth(data = media_bg_ts, aes(x=from_date, y=roll12weeks), se = FALSE) +
  geom_line(data = media_bg_ts, aes(x=from_date, y=roll12weeks, colour = source)) +
  geom_vline(xintercept = as.numeric(date_vday_07), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_admin_elections_12), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_gener_election_13), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_europ_election_14), 
             linetype = "dotted") +
  geom_text(data = data.frame(x = c(date_vday_07, date_admin_elections_12, date_gener_election_13, date_europ_election_14) + 30,
                              y = 0.2,
                              label =  c('(1)', '(2)', '(3)', '(4)')),
            aes(x, y, label  = label)) +
  # geom_segment(aes(x=x_limits[1], 
  #                  xend=date_df$date[2], 
  #                  y=date_df$mean_bg_all[1], 
  #                  yend=date_df$mean_bg_all[1])) +
  # geom_segment(aes(x=date_df$date[2], 
  #                  xend=date_df$date[3], 
  #                  y=date_df$mean_bg_all[2], 
  #                  yend=date_df$mean_bg_all[2])) +
  # geom_segment(aes(x=date_df$date[3], 
  #                  xend=date_df$date[4], 
  #                  y=date_df$mean_bg_all[3], 
  #                  yend=date_df$mean_bg_all[3])) +
  # geom_segment(aes(x=date_df$date[4], 
  #                  xend=date_df$date[5], 
  #                  y=date_df$mean_bg_all[4], 
  #                  yend=date_df$mean_bg_all[4])) +
  # geom_segment(aes(x=date_df$date[5], 
  #                  xend=date_df$date[6], 
  #                  y=date_df$mean_bg_all[5], 
  #                  yend=date_df$mean_bg_all[5])) +
  scale_x_date(limits = x_limits) +
  labs(x="Press attention to Beppe Grillo", y='Articles',  colour = NULL) +
  theme(legend.position = c(0.2, 0.5)) + 
  scale_y_continuous(labels = scales::percent)

## M5S
media_m5s_ts <- rbind(sqliteGetTable(db15, 'corriere_movimento'),
                      sqliteGetTable(db15, 'repubblica_movimento'),
                      #sqliteGetTable(db15, 'corriere_movimento1'),
                      #sqliteGetTable(db15, 'repubblica_movimento1'),
                      sqliteGetTable(db16, 'corriere_movimento'),
                      sqliteGetTable(db16, 'repubblica_movimento')
                      #sqliteGetTable(db16, 'corriere_movimento1'),
                      #sqliteGetTable(db16, 'repubblica_movimento1')
)
media_politica_ts <- rbind(sqliteGetTable(db15, 'corriere_politica'),
                           sqliteGetTable(db15, 'repubblica_politica'),
                           sqliteGetTable(db16, 'corriere_politica'),
                           sqliteGetTable(db16, 'repubblica_politica'))
names(media_politica_ts)[6] <- 'politica_hits' 
media_m5s_ts <- merge(media_m5s_ts, 
                      media_politica_ts[,c('from_date','source','politica_hits')],
                      by = c('from_date','source'))
media_m5s_ts$ratio <- with(media_m5s_ts, hits / politica_hits)
media_m5s_ts$ratio[is.na(media_m5s_ts$ratio)] <- 0
media_m5s_ts$from_date <- as.Date(media_m5s_ts$from_date)

require(zoo)
require(dplyr)
media_m5s_ts <-
  media_m5s_ts %>%
  dplyr::group_by(source) %>%
  dplyr::arrange(from_date, source) %>%
  dplyr::mutate(roll4weeks = rollmean(ratio, 4, fill = NA),
                roll8weeks = rollmean(ratio, 8, fill = NA),
                roll12weeks = rollmean(ratio, 12, fill = NA),
                roll52weeks = rollmean(ratio, 52, fill = NA))

date_df$mean_m5s_all <- NA
date_df$mean_m5s_corriere <- NA
date_df$mean_m5s_repubblica <- NA

for (i in 1:(nrow(date_df)-1)) {
  for (s in c("corriere", "repubblica", "all")) {
    if (s == "all") {
      tmp_df <- subset(media_m5s_ts, 
                       from_date >= date_df$date[i] &
                         to_date < date_df$date[i+1])
    } else {
      tmp_df <- subset(media_m5s_ts, 
                       source == s &
                         from_date >= date_df$date[i] &
                         to_date < date_df$date[i+1])
    }
    date_df[[paste0("mean_m5s_", s)]][i] <- mean(tmp_df$ratio)
  }
}

g_media_m5s <- 
  ggplot() + 
  # geom_smooth(data = media_m5s_ts, aes(x=from_date, y=roll12weeks), se = FALSE) +
  geom_line(data = media_m5s_ts, aes(x=from_date, y=roll12weeks, colour = source)) +
  geom_vline(xintercept = as.numeric(date_vday_07), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_admin_elections_12), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_gener_election_13), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_europ_election_14), 
             linetype = "dotted") +
  # geom_segment(aes(x=x_limits[1], 
  #                  xend=date_df$date[2], 
  #                  y=date_df$mean_m5s_all[1], 
  #                  yend=date_df$mean_m5s_all[1])) +
  # geom_segment(aes(x=date_df$date[2], 
  #                  xend=date_df$date[3], 
  #                  y=date_df$mean_m5s_all[2], 
  #                  yend=date_df$mean_m5s_all[2])) +
  # geom_segment(aes(x=date_df$date[3], 
  #                  xend=date_df$date[4], 
  #                  y=date_df$mean_m5s_all[3], 
  #                  yend=date_df$mean_m5s_all[3])) +
  # geom_segment(aes(x=date_df$date[4], 
  #                  xend=date_df$date[5], 
  #                  y=date_df$mean_m5s_all[4], 
  #                  yend=date_df$mean_m5s_all[4])) +
  # geom_segment(aes(x=date_df$date[5], 
  #                  xend=date_df$date[6], 
  #                  y=date_df$mean_m5s_all[5], 
  #                  yend=date_df$mean_m5s_all[5])) +
  scale_x_date(limits = x_limits) +
  theme(legend.position = c(0.2, 0.5)) + 
  labs(x="Press attention to the M5S", y='Articles', colour = NULL) +
  scale_y_continuous(labels = scales::percent)

## Blog
load("~/Dropbox/Thesis_PhD/trento_2016/data/blog_indices.RData")
g_blog_populism <- 
  ggplot(indices_mean, aes(x=as.Date(week), y=weekmean_populism)) +
  geom_point(size = 0.5) +
  geom_smooth() +
  scale_y_continuous(labels = scales::percent) +
  scale_x_date(limits = x_limits) +
  labs(x=NULL, y="blog's populism") +
  geom_vline(xintercept = as.numeric(date_vday_07), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_admin_elections_12), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_gener_election_13), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_europ_election_14), 
             linetype = "dotted")

g_blog_antipolitics <- 
  ggplot(indices_mean, aes(x=as.Date(week), y=weekmean_antipop)) +
  geom_point(size = 0.5) +
  geom_smooth() +
  scale_y_continuous(labels = scales::percent) +
  scale_x_date(limits = x_limits) +
  labs(x=NULL, y="blog's antipolitics") +
  geom_vline(xintercept = as.numeric(date_vday_07), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_admin_elections_12), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_gener_election_13), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_europ_election_14), 
             linetype = "dotted")


require(gridExtra)
load('/Users/francesco/Dropbox/Thesis_PhD/data/agcom/m5s_newscast_attention.RData')
g_tvnews <- 
  ggplot(m5s_newscast_attention,
         aes(x=date, y=daily_rollmean60)) +
  geom_line() +
  # geom_smooth(se=FALSE) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_date(limits = x_limits) +
  labs(x="TV attention to the M5S", y="Air time") +
  geom_vline(xintercept = as.numeric(date_vday_07), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_admin_elections_12), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_gener_election_13), 
             linetype = "dotted") +
  geom_vline(xintercept = as.numeric(date_europ_election_14), 
             linetype = "dotted")

gA <- ggplotGrob(g_polls_elect)
gB <- ggplotGrob(g_media_bg)
gC <- ggplotGrob(g_media_m5s)
gD <- ggplotGrob(g_blog_populism)
gE <- ggplotGrob(g_blog_antipolitics)
gF <- ggplotGrob(g_tvnews)

# gtable <- rbind(gD, gE, gB, gC, gA, size="last")
gtable <- rbind(gB, gC, gF, gA, size="last")

id <- gtable$layout$t[gtable$layout$name == "panel"]
# gtable$heights[id] <- lapply(c(10,5), "unit", "null")
# grid::grid.draw(gtable)

# Averages
# date_from <- as.Date('2000-01-01')
# date_to <- as.Date('2013-02-25')
# 
# corriere_SegOpp <- sqliteGetTable(db15, 'corriere_SegOpp')
# repubblica_SegOpp <- sqliteGetTable(db15, 'repubblica_SegOpp')
# 
# corriere_beppe <- sqliteGetTable(db15, 'corriere_beppe')
# repubblica_beppe <- sqliteGetTable(db15, 'repubblica_beppe')
# 
# merge_cor <- merge(corriere_SegOpp, corriere_beppe, by = c('from_date', 'to_date'))
# mean(with(subset(merge_cor, 
#                  from_date >= date_from & to_date <= date_to & hits.x > 0), hits.y / hits.x), na.rm = T)


